# -*- coding: utf-8 -*-
"""
Created on Thu Jun 13 17:07:14 2019

@author: jlani
"""

from os.path import isfile
from numpy.random import uniform
import numpy as np
import pandas as pd
from scipy.optimize import minimize as mini
from nested_crra import ccei_nested_crra, varian_nested_crra

#=============================================================================
#INPUT FILE
#=============================================================================
df = pd.read_csv( 'ITCR_cdata.csv' )
#Get rid of no risk questions
df = df[ (df['pz'] != 66666.) & (df['pw'] != 66666.) ]
ids = sorted( set( df['participantid'] ) )

#Put in edges
df['edges'] = 0
zeros_count = (df[['x','y','z','w']] == 0.).sum(1)
df.loc[ zeros_count >= 1, 'edges' ] = 1

#=============================================================================
#RESULTS FILE
#=============================================================================

results_file = 'nested_crra_by_participant.xlsx'
columns = [ 'km_ccei', 'km_dr_ccei', 'km_time_ccei', 'km_risk_ccei', \
            'km_varian', 'km_dr_varian', 'km_time_varian', 'km_risk_varian', \
            'deu_ccei', 'deu_dr_ccei', 'deu_curve_ccei', \
            'deu_varian', 'deu_dr_varian', 'deu_curve_varian', 'cat' ]

if not isfile(results_file):
    results_df = pd.DataFrame( np.nan, columns = columns, index = ids )
    results_df.index.name = 'participant id'
else:
    results_df = pd.read_excel( results_file, sheet_name = 0, index_col = 0 )

#=============================================================================
#OPTIMIZE
#=============================================================================

#Instability at crrar = 0.0 and 1.0. Make sure the optimization routine
#does not run through these values.

bounds_to_use = [ (0.0, 0.0), (0.01,0.99), (1.0,1.0), (1.01,3.0), (3.0,10.0) ]

for pid in ids:
    df_cur = df[ df['participantid'] == pid]
    results_df.loc[pid, 'cat'] = 'edge0'
    if df_cur['edges'].sum() == 41:
        results_df.loc[pid, 'cat'] = 'edge41'
    if pd.isnull( results_df.loc[pid,:] ).any():
        print('working on pid {0}'.format(pid))
        P = df_cur.loc[ :, 'px':'pw' ].to_numpy()
        C = df_cur.loc[ :, 'x':'w' ].to_numpy()
        
        #================
        #AFRIAT KM
        #================
        print('Afriat KM')
        if pd.isnull( results_df.loc[pid,['km_ccei','km_dr_ccei', 'km_time_ccei', 'km_risk_ccei']] ).any():
            obj_km_afriat = lambda inputs: -ccei_nested_crra(C = C, P = P, discount_rate = inputs[0], crra_time = inputs[1], crra_risk = inputs[2])
            best_sol = -1.0
            for tbounds in bounds_to_use:
                for rbounds in bounds_to_use:
                    bounds = ( (0.0,1.0), tbounds, rbounds )
                    for count in range(5):
                        init = np.array( [ uniform(*bounds[0]), uniform(*bounds[1]), uniform(*bounds[2]) ] )
                        result = mini( fun = obj_km_afriat, x0 = init, method = 'Powell', bounds = bounds )
                        if best_sol < -result.fun:
                            best_sol = -result.fun
                            km_dr_ccei, km_time_ccei, km_risk_ccei = result.x
    
            results_df.loc[pid, ['km_ccei','km_dr_ccei', 'km_time_ccei', 'km_risk_ccei']] = \
            np.array( [best_sol, km_dr_ccei, km_time_ccei, km_risk_ccei ] )
        
        
        #================
        #VARIAN KM
        #================
        
        print('Varian KM')
        if pd.isnull( results_df.loc[pid,['km_varian','km_dr_varian', 'km_time_varian', 'km_risk_varian']] ).any():
            obj_km_varian = lambda inputs: -varian_nested_crra(C = C, P = P, discount_rate = inputs[0], crra_time = inputs[1], crra_risk = inputs[2])
            best_sol = -1.0
            for tbounds in bounds_to_use:
                for rbounds in bounds_to_use:
                    bounds = ( (0.0,1.0), tbounds, rbounds )
                    for count in range(5):
                        init = np.array( [ uniform(*bounds[0]), uniform(*bounds[1]), uniform(*bounds[2]) ] )
                        result = mini( fun = obj_km_varian, x0 = init, method = 'Powell', bounds = bounds )
                        if best_sol < -result.fun:
                            best_sol = -result.fun
                            km_dr_varian, km_time_varian, km_risk_varian = result.x
    
            results_df.loc[pid, ['km_varian','km_dr_varian', 'km_time_varian', 'km_risk_varian']] = \
            np.array( [best_sol, km_dr_varian, km_time_varian, km_risk_varian ] )
            
        #================
        #AFRIAT DEU
        #================
        
        print('Afriat DEU')
        if pd.isnull( results_df.loc[pid,['deu_ccei','deu_dr_ccei', 'deu_curve_ccei' ]] ).any():
            obj_deu_afriat = lambda inputs: -ccei_nested_crra(C = C, P = P, discount_rate = inputs[0], crra_time = inputs[1], crra_risk = inputs[1])
            best_sol = -1.0
            for bounds1 in bounds_to_use:
                bounds = ( (0.0,1.0), bounds1 )
                for count in range(5):
                    init = np.array( [ uniform(*bounds[0]), uniform(*bounds[1]) ] )
                    
                    result = mini( fun = obj_deu_afriat, x0 = init, method = 'Powell', bounds = bounds )
                    if best_sol < -result.fun:
                        best_sol = -result.fun
                        deu_dr_ccei, deu_curve_ccei = result.x
            results_df.loc[pid, ['deu_ccei','deu_dr_ccei', 'deu_curve_ccei']] = \
            np.array( [best_sol, deu_dr_ccei, deu_curve_ccei ] )
        
        #================
        #VARIAN DEU
        #================
        
        print('Varian DEU')
        if pd.isnull( results_df.loc[pid,['deu_varian','deu_dr_varian', 'deu_curve_varian' ]] ).any():
            obj_deu_varian = lambda inputs: -varian_nested_crra(C = C, P = P, discount_rate = inputs[0], crra_time = inputs[1], crra_risk = inputs[1])
            best_sol = -1.0
            for bounds1 in bounds_to_use:
                bounds = ( (0.0,1.0), bounds1 )
                
                for count in range(5):
                    init = np.array( [ uniform(*bounds[0]), uniform(*bounds[1]) ] )
                    result = mini( fun = obj_deu_varian, x0 = init, method = 'Powell', bounds = bounds )
                    if best_sol < -result.fun:
                        best_sol = -result.fun
                        deu_dr_varian, deu_curve_varian = result.x
            results_df.loc[pid, ['deu_varian','deu_dr_varian', 'deu_curve_varian' ]] = \
            np.array( [best_sol, deu_dr_varian, deu_curve_varian ] )
        
        results_df.to_excel( results_file )
results_df.to_excel( results_file )

#=============================================================================
#SUMMARIZE
#=============================================================================

summary_file = 'nested_crra_summary.xlsx'

quartiles = np.array([ 25, 50, 75 ])
cats = ['all', 'edge41', 'edge0']

my_index = pd.MultiIndex.from_product( [cats, quartiles], names = ['category','quartile'] )

summary_df = pd.DataFrame(np.nan, columns = columns, index = my_index)

for cat in cats:
    for perc in quartiles:
        for var in columns[:-1]:
            print('cat: {0}, perc: {1}, var {2}'.format(cat,perc,var))
            if cat == 'all':
                summary_df.loc[ (cat, perc), var] = np.percentile( results_df.loc[ results_df['km_ccei']<1. , var ], perc )
            else:
                summary_df.loc[ (cat, perc), var] = np.percentile( results_df.loc[ (results_df['km_ccei']<1.) & (results_df['cat'] == cat) , var ], perc )

summary_df.to_excel(summary_file)


corr_file = 'nested_crra_correlations.xlsx'
corr_df = results_df.corr()
corr_df.to_excel(corr_file)


